* generate different health insurance measures;
* use lags to give people a sense of the health insurance opportunity set;
* lagged eprhi is dentoed leprhi;
* leprhi = measurement error coding and use first observed health insurance;
* leprhix = raw data;
* leprhixx = no measurement error, use first observed health insurance;   
* leprhixxx =measurement error coding, last period's health insurance;

# delimit ;  
set more 1 ;  
local UNIX 0;
local useZ 0; * lfpr measure used;
drop _all;
clear all;
capture log close;
*set maxvar 32000;
set mem 1000m;
*global folder "C:\Users\Ali\Documents";

*global folder "C:\Users\Eric\Dropbox\";
 global folder "C:\Dropbox";
*global folder "C:\research";
*global folder "C:\Users\Jeremy\Dropbox";
*global folder "\\econ-san-fs\Home4\uctpkp0\SystemFolder\Windows7\Desktop";




         local saved  "$folder\hrs\merge\";
         local trackd "$folder\hrs\tracker\";
         local wave1d "$folder\hrs\wave1\";
         local wave2d "$folder\hrs\wave2\";
         local wave3d "$folder\hrs\wave3\";
         local wave4d "$folder\hrs\wave4\";
         local wave5d "$folder\hrs\wave5\";
		 local wave6d "$folder\hrs\wave6\";
         local wave7d "$folder\hrs\wave7\";
         local wave8d "$folder\hrs\wave8\";
		 local wave9d "$folder\hrs\wave9\";
		 local wave10d "$folder\hrs\wave10\";
		 local wave11d "$folder\hrs\wave11\";
		 local wave12d "$folder\hrs\wave12\";
		 local wave13d "$folder\hrs\wave13\";		 
		 
         local aheadw1 "$folder\hrs\aheadw1\";
         local aheadw2 "$folder\hrs\aheadw2\";
		 
program drop _all;
*recode age and health insurance data;
log using `saved'merge7.log, replace ;
use `saved'merge6;
* drop the following;
*gen numwave=7+useRAND;
gen numwave=20;

sum age;



*********************************************;
* Code up insurance data ********************;
*********************************************;

**********************************************;
* eric to self: is the linstie coding correct?;
* will it continue to generate "tied" people, 2 years after they left their job? (when it should only be 1?);  

* generate lagged health insurance variables for "real" waves;
sort indnum wave;
gen lepins=.;
replace lepins=epins[_n-1] if indnum==indnum[_n-1] & wave==wave[_n-1]+1 & wave<=numwave;
gen leprhi=.;
replace leprhi=eprhi[_n-1] if indnum==indnum[_n-1] & wave==wave[_n-1]+1 & wave<=numwave;
sort indnum wave;
gen linstie=.;
replace linstie=instie[_n-1] if indnum==indnum[_n-1] & wave==wave[_n-1]+1 & wave<=numwave;
sort indnum wave;
gen linsnone=.;
replace linsnone=insnone[_n-1] if indnum==indnum[_n-1] & wave==wave[_n-1]+1 & wave<=numwave;
sort indnum wave;
gen linspriv=.;
replace linspriv=inspriv[_n-1] if indnum==indnum[_n-1] & wave==wave[_n-1]+1 & wave<=numwave;
sort indnum wave;
gen linssp=.;
replace linssp=inssp[_n-1] if indnum==indnum[_n-1] & wave==wave[_n-1]+1 & wave<=numwave;
sort indnum wave;
gen linsgov=.;
replace linsgov=insgov[_n-1] if indnum==indnum[_n-1] & wave==wave[_n-1]+1 & wave<=numwave;



* now do it for "fake" waves;
sort indnum age1;
replace lepins= epins[_n-1] if indnum==indnum[_n-1] & age1==age1[_n-1]+1 & wave>numwave;
replace leprhi= eprhi[_n-1] if indnum==indnum[_n-1] & age1==age1[_n-1]+1 & wave>numwave;
replace linstie=instie[_n-1] if indnum==indnum[_n-1] & age1==age1[_n-1]+1 & wave>numwave;
replace linsnone=insnone[_n-1] if indnum==indnum[_n-1] & age1==age1[_n-1]+1 & wave>numwave;
replace linspriv=inspriv[_n-1] if indnum==indnum[_n-1] & age1==age1[_n-1]+1 & wave>numwave;
replace linssp=inssp[_n-1] if indnum==indnum[_n-1] & age1==age1[_n-1]+1 & wave>numwave;
replace linsgov=insgov[_n-1] if indnum==indnum[_n-1] & age1==age1[_n-1]+1 & wave>numwave;

***************************************************************************;
*try to correct for measurement error in the health insurance type measure;
sort indnum age1;
gen llfpr=.;
replace llfpr=lfpr[_n-1] if indnum==indnum[_n-1] & (age1==age1[_n-1]+1);

replace linsnone=1 if linstie==1 & llfpr==0 & indnum==indnum[_n-1] & (age1==age1[_n-1]+1);
replace linstie=0 if linstie==1 & llfpr==0 & indnum==indnum[_n-1] & (age1==age1[_n-1]+1);

***********************************************************************;
*** RECODE HEALTH INSURANCE STATUS HERE *******************************;
*replace leprhi=linssp if (leprhi==0|leprhi==.) & linssp==1; * this one is done in merge6;
replace linsnone=linspriv if (linsnone==0|linsnone==.) & linspriv==1;

replace linstie=0 if leprhi==1|linsnone==1;
replace linstie=1 if leprhi==0 & linsnone==0;
replace linsnone=0 if leprhi==1|linstie==1;
replace linsnone=1 if leprhi==0 & linstie==0;
*still need to think through private insurance;
* replace linspriv=1 if ;
replace leprhi=0 if linstie==1|linsnone==1;
replace leprhi=1 if linstie==0 & linsnone==0;

***********************************************************************;
* GENERATE ADDITIONAL HEALTH INSURANCE MEASURES FOR ROBUSTNESS CHECKS *;
sort indnum age wave;
gen leprhix=leprhi;
gen linstiex=linstie;
gen linsnonex=linsnone;
gen leprhixx=leprhi;
gen linstiexx=linstie;
gen linsnonexx=linsnone;



***********************************************************************;
* MEASUREMENT ERROR CORRECTIONS HERE !!!!!!!!!!!!!! *******************;
***********************************************************************;
*** eprhi question is asked differently waves 1-3 vs 4-10***************;
gen goodwave=0;
*replace goodwave=1 if (wave==1|wave==2|wave==3|wave==7|wave==8|wave==12|wave==13);
replace goodwave=1 if (wave==1|wave==2|wave==3|wave==102|wave==103|wave==202|wave==203);

sum lepins leprhi linstie if age<65 & goodwave==1;
sum lepins leprhi linstie if age<65 & goodwave==0;
sum lepins leprhi linstie if age>64 & goodwave==1;
sum lepins leprhi linstie if age>64 & goodwave==0;

* do measurement error imputations for those under age 66;
* note that the question individuals are asked refers to "...will you have this insurance to age 65?";
* This is only a meaningful statement for those 63 or younger;
* At 64, COBRA law says that you will have retiree coverage until 65;
* Note, however, that I am using lagged health insurance coverage, ;
* so age less than 66 seems like the right cut-off;
gen agecut=65;
*gen age=age1;

egen meaneprhis=mean(leprhi) if age<agecut & goodwave==1 & lepins==1;
egen meaneprhi=mean(leprhi) if age< agecut & goodwave==0 & lepins==1;

* give everyone these values;
sort indnum wave;
replace meaneprhis=meaneprhis[_n-1] if meaneprhis==.;
replace meaneprhi=meaneprhi[_n-1] if meaneprhi==.;
gen oindnum=1-indnum;
gen owave=1-wave;
sort oindnum owave;
replace meaneprhis=meaneprhis[_n-1] if meaneprhis==.;
replace meaneprhi=meaneprhi[_n-1] if meaneprhi==.;

* now generate a measurement error probability for waves 1 and 2;
* probability of linstie given measured leprhi for someone in waves 1 or 2;
gen probtieds=(meaneprhis-meaneprhi)/meaneprhis; 

* now use the above calculation to get estimated health insurance status for everyone in waves 1 and 2;
gen temp1=.;
replace temp1=probtieds if leprhi==1;
gen temp2=.;
replace temp2=(1-probtieds) if leprhi==1;
replace linstie=temp1 if leprhi==1 & goodwave==1;
replace leprhi=temp2 if leprhi==1 & goodwave==1;

* double check that all the missings are filled in that can be filled in;
* this step shouldn't do anything;
replace linsnone=0 if linsnone==. & (leprhi~=0 & leprhi~=.)|(linstie~=0 & linstie~=.);
replace linsnone=1 if linsnone==. & (leprhi==0) & (linstie==0);
replace linstie=0 if linstie==. & (leprhi==1)|(linsnone==1);
replace linstie=1 if linstie==. & (leprhi==0) & (linsnone==0);
replace leprhi=0 if leprhi==. & (linstie==1)|(linsnone==1);
replace leprhi=1 if leprhi==. & (linstie==0) & (linsnone==0);
gen ins=leprhi+linstie+linsnone;
tab ins;
tab leprhi;
tab linstie;
tab linsnone;



sum medcare;
*exit;


***********************************************************************;
* GENERATE ADDITIONAL HEALTH INSURANCE MEASURES FOR ROBUSTNESS CHECKS *;
sort indnum age wave;
gen leprhixxx=leprhi;
gen linstiexxx=linstie;
gen linsnonexxx=linsnone;

***************************************************************************;
* John's suggestion on coding -give everyone their first value of insurance;

sort indnum age wave;
replace leprhi=leprhi[_n-1] if leprhi[_n-1]~=. & lfpr[_n-1]~=. & indnum==indnum[_n-1];
replace linstie=linstie[_n-1] if linstie[_n-1]~=. & lfpr[_n-1]==1  & indnum==indnum[_n-1];
replace linstie=. if (linstie[_n-1]~=0 & lfpr[_n-1]==.) & indnum==indnum[_n-1];
replace linstie=0 if lfpr[_n-1]==0 & indnum==indnum[_n-1];
replace linstie=0 if (linstie[_n-1]==0 & lfpr[_n-1]~=.) & indnum==indnum[_n-1];
replace linsnone=1-(leprhi+linstie);

gen insx=leprhi+linstie+linsnone;
tab insx;

sort indnum age wave;
replace leprhixx=leprhixx[_n-1] if leprhixx[_n-1]~=. & lfpr[_n-1]~=. & indnum==indnum[_n-1];
replace linstiexx=linstiexx[_n-1] if linstiexx[_n-1]~=. & lfpr[_n-1]==1  & indnum==indnum[_n-1];
replace linstiexx=. if (linstiexx[_n-1]~=0 & lfpr[_n-1]==.) & indnum==indnum[_n-1];
replace linstiexx=0 if lfpr[_n-1]==0 & indnum==indnum[_n-1];
replace linstiexx=0 if (linstiexx[_n-1]==0 & lfpr[_n-1]~=.) & indnum==indnum[_n-1];
replace linsnonexx=1-(leprhixx+linstiexx);

gen insxx=leprhixx+linstiexx+linsnonexx;
tab insxx;

* end of measurement error coding;
sum leprhi linstie linsnone;
sum leprhixx linstiexx linsnonexx;







*generate age*health;
*gen ageb=age1*healw;
*gen ageg=age1*(1-healw);



*generate birthyear dummies;

gen byear=realyear-age1;
replace byear=1 if byear<1;

program define makebyr;
	version 3.1;
		local i = 22;
		while `i' <=47{;
		gen b`i'=0;
		replace b`i'=1 if byear==`i';
		local i=`i'+1 };
end;
makebyr;

* generate age polynomial, good and bad health;
* I don't think I use the polynomial right now;
/* rename ageg ageg1;
gen ageg2=ageg1*ageg1;
gen ageg3=ageg2*ageg1;
gen ageg4=ageg3*ageg1;
gen ageg5=ageg4*ageg1;
gen ageg6=ageg5*ageg1;
gen ageg7=ageg6*ageg1;
gen ageg8=ageg7*ageg1;
gen ageg9=ageg8*ageg1;
gen ageg10=ageg9*ageg1;
gen ageg11=ageg10*ageg1;
gen ageg12=ageg11*ageg1;
gen ageg13=ageg12*ageg1;

rename ageb ageb1;
gen ageb2=ageb1*ageb1;
gen ageb3=ageb2*ageb1;
gen ageb4=ageb3*ageb1;
gen ageb5=ageb4*ageb1;
gen ageb6=ageb5*ageb1;
gen ageb7=ageb6*ageb1;
gen ageb8=ageb7*ageb1;
gen ageb9=ageb8*ageb1;
gen ageb10=ageb9*ageb1;
gen ageb11=ageb10*ageb1;
gen ageb12=ageb11*ageb1;
gen ageb13=ageb12*ageb1;
*/

gen healb=0;
replace healb=1 if healw==1;
gen healg=0;
replace healg=1 if healw==0;

keep spouse* winc ag* indnum age1 heal* lhours lfpr lfprz lfprzz lfprw1 llfpr married b* realyear wave ass* 
wage* ins* eprhi* epins lins* leprhi* iprem* oopd* drugc oop medcost* _* haz enter lnew 
work male HHID PN insmedcd nursing hosp drtimes outsurg dentist nodrug workpref
realage tenure socc switch reason job same*  rcen*
liquid stock bonds re wheels exasset debt bus house nohouse IRA pen* dead vla hrscoh SScov SSY* V* medcare medicaid disabled initobs;

sort realyear;
* by realyear: summ instie linstie;

*drop if work==2; * for checking whether results are driven by the self-employed;

gen test=realage-age1; *  check the importance of worrying about month;
*replace lfpr=. if test<.2;
*replace haz=. if test<.2;

sort indnum age1;



sum _socy if age>65 & age<100;




sum male married lfpr lfprz lfprzz  leprhi assets;
* missing variable stuff;
sort ageg;
by ageg: sum lfpr;

drop if (leprhi==0 & linstie==0 & linsnone==0);
sum male married lfpr leprhi assets;
sum linsnone linstie leprhi;



tab age;

drop if linsnone==.;
drop if leprhi==.;
drop if linstie==.;


tab age;

* exit;

sort ageg;
by ageg: sum lfpr;

sort wave;
by wave: sum healw  instie linstie linstiexx if hrscoh==1 & wave<11 & dead~=1;

/*
drop if wave>7;
drop if hrs~=11;
sort eprhi;
by eprhi: sum medco* if hrscoh==11 & (age<85 & age>74) & lfpr==0;
sort leprhixx;
by leprhixx: sum medco* if hrscoh==11 & (age<85 & age>74) & lfpr==0;
*/


* figure out people who have retiree coverage, but are older than 65;
* for individuals older than 65, presence of eprhi is not reported;
* we assume they have retiree coverage if;
* at time t, they have employer provided insurance but are not working;
* at time t+2, they still have employer provided coverage;
* those who we cannot identify as having retiree coverage, we will consider as ;
* having tied coverage;

* NOTE: WE NEED TO DO THIS STUFF BELOW ONLY FOR WAVES 1-7;

sort indnum realyear;
replace leprhixx=1 if (hrscoh~=1 & lfpr[_n-1]==0 & epins[_n-1]==1 & indnum==indnum[_n-1] & age>65 & wave<=20);
replace leprhixx=leprhixx[_n-1] if leprhixx[_n-1]==1 & indnum==indnum[_n-1] & age>65;
gen omwave=1-wave; 
sort indnum omwave;
replace leprhixx=leprhixx[_n-1] if leprhixx[_n-1]==1 & indnum==indnum[_n-1] & age>65;
replace linstiexx=0 if leprhixx==1 | linsnone==1;
sort indnum wave;


sum _socy if age>65 & age<100;


compress;
* this is the dataset we use for estimating the distribution of medical expenses;
* for "the effects of health insurance and self-insurance on retirement behavior" ;
save `saved'merge7a, replace; 



* now using all cohorts;
* drop if (hrscoh~=1 & hrscoh~=12);
drop if age1<51|age1>69;
drop if dead==1;
sum male married lfpr leprhi assets;

sort ageg;
by ageg: sum lfpr;
drop if assets==.;
sort ageg;
by ageg: sum lfpr;

***************************decide which version of lfpr we wish to use z or no z;

if `useZ'==1{;
replace lfpr=lfprz;
};
if `useZ'==2{;
replace lfpr=lfprzz;
};

drop if lfpr==.;
sort ageg;
by ageg: sum lfpr;
sum male married lfpr leprhi assets;



* this is the dataset we use for estimating life cycle profiles;
* for "the effects of health insurance and self-insurance on retirement behavior" ;
saveold `saved'merge7, replace; 
*drop _all;
log close ;


